{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "def evaluate(y_pred, y_true):\n",
    "    acc = metrics.accuracy_score(y_true, y_pred)\n",
    "    prec = metrics.precision_score(y_true, y_pred, average='macro')\n",
    "    rec = metrics.recall_score(y_true, y_pred, average='macro')\n",
    "    f1 = metrics.f1_score(y_true, y_pred, average='macro')\n",
    "    \n",
    "    return acc, prec, rec, f1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "def process(predictions):\n",
    "    records = []\n",
    "    for fcsv in predictions:\n",
    "        df = pd.read_csv(fcsv)\n",
    "        train = df[df['split']=='train']\n",
    "        (acc, prec, rec, f1) = evaluate(train['y_pred'],\n",
    "                                              train['y_true'])\n",
    "        record = dict(method=method,\n",
    "                     pct_missing=pct_missing,\n",
    "                     split='train',\n",
    "                     label='true',\n",
    "                     acc=acc,\n",
    "                     prec=prec,\n",
    "                     rec=rec,\n",
    "                     f1=f1)\n",
    "        records.append(record)\n",
    "        (acc_lab, prec_lab, rec_lab, f1_lab) = evaluate(train['y_pred'],\n",
    "                                                        train['y_label'])\n",
    "        record_lab = dict(method=method,\n",
    "                     pct_missing=pct_missing,\n",
    "                     split='train',\n",
    "                     label='label',\n",
    "                     acc=acc_lab,\n",
    "                     prec=prec_lab,\n",
    "                     rec=rec_lab,\n",
    "                     f1=f1_lab)\n",
    "        records.append(record_lab)\n",
    "\n",
    "        test = df[df['split']=='test']\n",
    "        (acc_val, prec_val, rec_val, f1_val) = evaluate(test['y_pred'], test['y_true'])\n",
    "        record_val = dict(method=method,\n",
    "                     pct_missing=pct_missing,\n",
    "                     split='test',\n",
    "                     label='true',\n",
    "                     acc=acc_val,\n",
    "                     prec=prec_val,\n",
    "                     rec=rec_val,\n",
    "                     f1=f1_val)\n",
    "        records.append(record_val)\n",
    "    return records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['logs/round1/prediction_cifar110_vgg8.csv']\n",
      "       acc        f1  label    method  pct_missing      prec       rec  split\n",
      "0  0.90655  0.892779   true  complete          0.0  0.929700  0.865649  train\n",
      "1  0.72402  0.619262  label  complete          0.0  0.510247  0.840993  train\n",
      "2  0.86665  0.841869   true  complete          0.0  0.888834  0.810482   test\n"
     ]
    }
   ],
   "source": [
    "dataset = 'cifar110'\n",
    "model = 'vgg8'\n",
    "method = 'complete'\n",
    "pct_missing = 0.\n",
    "rounds = ['round1']\n",
    "predictions = [\n",
    "    'logs/{}/prediction_{}_{}.csv'.format(rnd, dataset, model)\n",
    "    for rnd in rounds]\n",
    "evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)\n",
    "print(predictions)\n",
    "records = process(predictions)\n",
    "dfeval = pd.DataFrame(records)\n",
    "print(dfeval)\n",
    "dfeval.to_csv(evaluation, index=False, mode='a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['logs/round2/prediction_cifar110_vgg8_pu_0.5.csv', 'logs/round3/prediction_cifar110_vgg8_pu_0.5.csv']\n",
      "       acc        f1  label method  pct_missing      prec       rec  split\n",
      "0  0.67825  0.531185   true     pu          0.5  0.953374  0.417831  train\n",
      "1  0.78125  0.510123  label     pu          0.5  0.618192  0.469572  train\n",
      "2  0.66105  0.497196   true     pu          0.5  0.941629  0.388309   test\n",
      "3  0.67496  0.529849   true     pu          0.5  0.950470  0.411865  train\n",
      "4  0.79361  0.534090  label     pu          0.5  0.639168  0.486366  train\n",
      "5  0.65400  0.486697   true     pu          0.5  0.938322  0.375082   test\n"
     ]
    }
   ],
   "source": [
    "dataset = 'cifar110'\n",
    "model = 'vgg8'\n",
    "method = 'pu'\n",
    "pct_missing = 0.5\n",
    "rounds = ['round1','round2','round3']\n",
    "rounds = ['round2','round3']\n",
    "# rounds = ['round2', 'round3', 'round4', 'round5','round6','round7','round8']\n",
    "predictions = [\n",
    "    'logs/{}/prediction_{}_{}_{}_{}.csv'.format(rnd, dataset, model, method, pct_missing)\n",
    "    for rnd in rounds]\n",
    "evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)\n",
    "print(predictions)\n",
    "records = process(predictions)\n",
    "dfeval = pd.DataFrame(records)\n",
    "print(dfeval)\n",
    "dfeval.to_csv(evaluation, index=False, mode='a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acc            0.657525\n",
      "f1             0.491947\n",
      "pct_missing    0.500000\n",
      "prec           0.939976\n",
      "rec            0.381695\n",
      "dtype: float64\n",
      "acc            0.004985\n",
      "f1             0.007423\n",
      "pct_missing    0.000000\n",
      "prec           0.002338\n",
      "rec            0.009353\n",
      "dtype: float64\n",
      "acc            0.676605\n",
      "f1             0.530517\n",
      "pct_missing    0.500000\n",
      "prec           0.951922\n",
      "rec            0.414848\n",
      "dtype: float64\n",
      "acc            0.002326\n",
      "f1             0.000945\n",
      "pct_missing    0.000000\n",
      "prec           0.002054\n",
      "rec            0.004218\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(dfeval[dfeval['split']=='test'].mean())\n",
    "print(dfeval[dfeval['split']=='test'].std())\n",
    "print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].mean())\n",
    "print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].std())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['logs/round2/prediction_cifar110_vgg8_hb_0.5.csv', 'logs/round3/prediction_cifar110_vgg8_hb_0.5.csv']\n",
      "       acc        f1  label method  pct_missing      prec       rec  split\n",
      "0  0.82025  0.807477   true     hb          0.5  0.777370  0.843822  train\n",
      "1  0.64934  0.566379  label     hb          0.5  0.451396  0.858840  train\n",
      "2  0.79975  0.779914   true     hb          0.5  0.753525  0.812800   test\n",
      "3  0.82095  0.807968   true     hb          0.5  0.785751  0.839498  train\n",
      "4  0.65348  0.569285  label     hb          0.5  0.456205  0.854135  train\n",
      "5  0.79900  0.778309   true     hb          0.5  0.760227  0.807264   test\n"
     ]
    }
   ],
   "source": [
    "dataset = 'cifar110'\n",
    "model = 'vgg8'\n",
    "method = 'hb'\n",
    "pct_missing = 0.5\n",
    "rounds = ['round1']\n",
    "rounds = ['round2', 'round3']\n",
    "predictions = [\n",
    "    'logs/{}/prediction_{}_{}_{}_{}.csv'.format(rnd, dataset, model, method, pct_missing)\n",
    "    for rnd in rounds]\n",
    "evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)\n",
    "print(predictions)\n",
    "records = process(predictions)\n",
    "dfeval = pd.DataFrame(records)\n",
    "print(dfeval)\n",
    "dfeval.to_csv(evaluation, index=False, mode='a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acc            0.799375\n",
      "f1             0.779111\n",
      "pct_missing    0.500000\n",
      "prec           0.756876\n",
      "rec            0.810032\n",
      "dtype: float64\n",
      "acc            0.000530\n",
      "f1             0.001135\n",
      "pct_missing    0.000000\n",
      "prec           0.004739\n",
      "rec            0.003915\n",
      "dtype: float64\n",
      "acc            0.820600\n",
      "f1             0.807723\n",
      "pct_missing    0.500000\n",
      "prec           0.781560\n",
      "rec            0.841660\n",
      "dtype: float64\n",
      "acc            0.000495\n",
      "f1             0.000347\n",
      "pct_missing    0.000000\n",
      "prec           0.005926\n",
      "rec            0.003057\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(dfeval[dfeval['split']=='test'].mean())\n",
    "print(dfeval[dfeval['split']=='test'].std())\n",
    "print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].mean())\n",
    "print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].std())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       acc        f1  label method  pct_missing      prec       rec  split\n",
      "0  0.67496  0.529849   true     pu          0.5  0.950470  0.411865  train\n",
      "1  0.79361  0.534090  label     pu          0.5  0.639168  0.486366  train\n",
      "2  0.65400  0.486697   true     pu          0.5  0.938322  0.375082   test\n"
     ]
    }
   ],
   "source": [
    "dataset = 'cifar110'\n",
    "model = 'vgg8'\n",
    "method = 'pu'\n",
    "pct_missing = 0.5\n",
    "predictions = ['prediction_{}_{}_{}_{}.csv'.format(dataset, model, method, pct_missing)]\n",
    "records = process(predictions)\n",
    "dfeval = pd.DataFrame(records)\n",
    "print(dfeval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
